#encoding=utf-8
from pyPdf import PdfFileWriter, PdfFileReader
import os
import time
import sys
from win32com.client import Dispatch, constants, gencache
#from __future__ import print_function 
from wand.image import Image 
'''
doc2pdf
# Requires Office 2007 SP2
# Requires python for win32 extension
参考:http://blog.csdn.net/rumswell/article/details/7434302

pdf2png
http://www.imagemagick.org/download/binaries/
https://pypi.python.org/pypi/Wand

wand.exceptions.DelegateError: PDFDelegateFailed `The system cannot find the file specified.
' @ error/pdf.c/ReadPDFImage/809
> require to install gs
https://www.ghostscript.com/download/gsdnld.html

'''

def word2pdf(input,output):
  #判断文件是否存在
  if not os.path.isfile(input):
    print u'%s not exist' % input
    return False
  #文档路径需要为绝对路径，因为Word启动后当前路径不是调用脚本时的当前路径。
  if (not os.path.isabs(input)):#判断是否为绝对路径
    print u'%s not absolute path'%input
    return False
  if (not os.path.isabs(output)):
    print u'%s not absolute path'%output
    return False

  try:
    print input,output
    # enable python COM support for Word 2007
    # this is generated by: makepy.py -i "Microsoft Word 12.0 Object Library"
    gencache.EnsureModule('{00020905-0000-0000-C000-000000000046}', 0, 8, 4)
    #开始转换
    w = Dispatch("Word.Application")
    try:
      doc = w.Documents.Open(input, ReadOnly = 1)
      doc.ExportAsFixedFormat(output, constants.wdExportFormatPDF,\
        Item = constants.wdExportDocumentWithMarkup, CreateBookmarks = constants.wdExportCreateHeadingBookmarks)
    except:
      print ' exception'
    finally:
      w.Quit(constants.wdDoNotSaveChanges)

    if os.path.isfile(output):
      print 'translate success'
      return True
    else:
      print 'translate fail'
      return False
  except:
    print ' exception'
    return -1


def pdf2png(ifile,ofile,dpi=(300,300)):
    with Image(filename=ifile,resolution=dpi) as img: 
        print('width =', img.width) 
        print('height =', img.height) 
        print('pages = ', len(img.sequence)) 
        print('resolution = ', img.resolution) 
        print('format  = ', img.format ) 
        print('background_color = ', img.background_color) 
        #img.resize(50,50)
        #img.alpha_channel = 'remove'
        #img.save(filename=r"d:\temp\test.png")
        with img.convert('png') as converted: 
             converted.save(filename=ofile) 


def part_pdf(input_file, output_file, config_count, f_w, now, file_name):
    pdf = getpdf(input_file)
    pdf_pages_len = len(pdf.pages)
    if config_count <= pdf_pages_len:
        ye = pdf_pages_len / config_count
        lst_ye = pdf_pages_len % config_count
        part_count = 0
        part_count_ye = 0
        for fen in range(config_count):
          part_count += 1
          if part_count == config_count:
            part_ye = ye + lst_ye
          else:
            part_ye = ye
          write_pdf(pdf, part_count_ye, part_count_ye+part_ye, fen, output_file)
          part_count_ye += ye
    else:
        f_w.writelines('time: '+now+' file name: '+file_name+' status: part_num > pdf pages [error]\n')
        sys.exit(1)
    
def write_pdf(pdf, part_count_ye, part_count_ye_end, fen, output_file):
    out = PdfFileWriter()
    for pp in range(part_count_ye, part_count_ye_end):
        out.addPage(pdf.getPage(pp))
    ous = file(output_file+'_'+str(fen+1)+'.pdf', 'wb')
    out.write(ous)
    ous.close()

def cut_pdf(ifile,ofile,leftsize,rightsize):
    pdf = PdfFileReader(file(ifile, 'rb'))
    out = PdfFileWriter()

    for page in pdf.pages:
        page.mediaBox.upperRight = rightsize
        page.mediaBox.lowerLeft = leftsize
        out.addPage(page)

    ous = file(ofile, 'wb')
    out.write(ous)
    ous.close() 

def getpdf(ifile):
    p=file(path,'rb')
    pdf = pyPdf.PdfFileReader(p)
    return pdf
    
    
def splitpdf():
    config_count = 2
    f_w = open(r'd:\temp\pp_log.txt', 'a')
    now = time.strftime('%Y-%m-%d %H:%M:%S')
    ifile = r'E:\EDriver\Data\07_Task\01.AS400\Multiple Items\LP000114.pdf'
    ofile = r'E:\EDriver\Data\07_Task\01.AS400\Multiple Items\LP000114.pdf'
    part_pdf(ifile, ofile, config_count, f_w, now, "LP000114.pdf")
    f_w.writelines('time: '+now+' file name: LP000114.pdf status: success\n')

def getContent(ifile,page):
    pdf = getpdf(ifile)
    content=pdf.getPage(page).extractText() + "\n"
    content = content.replace(u"\xa0", " ")
    return content

def showInfo(ifile):
    pdf = getpdf(ifile)
    docInfo = pdf.getDocumentInfo()
    docInfo
    pdf.getNumPages()

if __name__ == '__main__':
    ifile = r'E:\EDriver\Data\07_Task\01.AS400\Multiple Items\LP000114.pdf'
    ofile = r'E:\EDriver\Data\07_Task\01.AS400\Multiple Items\LP000114(2).pdf'
    #cut_pdf(ifile,ofile,(128,232),(580,800))
    word2pdf(r"d:\temp\test.doc",r"d:\temp\test.pdf")
